# Here we follow the script JG sent (blp.py) for the most part (see notes below).
# should replicate col 3 of C&G table 8.
# This file is a mixture that allows for re-specifying the agent_data rather than taking it from the archive
# and also has product_data outside of the pyblp.Problem, which allows for a merge later with new characteristic
# NewIV means that we regenerate the IVs and add the one for space that is originally missing in the demand instrument
#
import pyblp
import numpy as np
import pandas as pd

#product data:
product_data = pd.read_csv(pyblp.data.BLP_PRODUCTS_LOCATION) # 2217*33
product_data.head()
product_data.count()
# replacing the instruments that come inside of product_data
#  with "built" BLP instruments
demand_instruments_new = pyblp.build_blp_instruments(pyblp.Formulation('1 + hpwt + air + mpd + space'), product_data)
type(demand_instruments_new) # it is a numpy array
# delete the 8 pre-existing demand iv
for i in range(8):
    del product_data[f'demand_instruments{i}']
for i, column in enumerate(demand_instruments_new.T):
    product_data[f'demand_instruments{i}'] = column
#
# construct agent data from primitives
sds = pd.read_csv('Data/BLP99/sdincome.csv', header=None, names=['sd'], usecols=[0])
means = pd.read_csv('Data/BLP99/meanincome.csv', header=None, names=['year', 'mean'], usecols=[0, 1])
market_ids = []
weights = []
nodes = []
income = []
for index, (_, row) in enumerate(means.iterrows()):
    integration = pyblp.Integration('halton', 10_000, {
        'discard': 1000 + index * 10_000,
        'seed': index,
    })    
    untransformed_agents = pyblp.build_integration(integration, 6) #
    market_ids.append(np.repeat(1900 + int(row['year']), untransformed_agents.weights.size))
    weights.append(untransformed_agents.weights)
    nodes.append(untransformed_agents.nodes[:, :-1])
    income.append(np.exp(row['mean'] + float(sds['sd']) * untransformed_agents.nodes[:, -1]))

# structure the problem
product_formulations = (
   pyblp.Formulation('1 + hpwt + air + mpd + space'), 
   pyblp.Formulation('1 + prices + hpwt + air + mpd + space'), 
   pyblp.Formulation('1 + log(hpwt) + air + log(mpg) + log(space) + trend')
)
product_formulations
agent_formulation = pyblp.Formulation('0 + I(1 / income)')
agent_formulation
problem = pyblp.Problem(product_formulations, product_data, agent_formulation,agent_data={
        'market_ids': np.concatenate(market_ids),
        'weights': np.concatenate(weights),
        'nodes': np.vstack(nodes),
        'income': np.concatenate(income),
    }, 
    costs_type='log')
problem
initial_sigma = np.diag([3.612, 0, 4.628, 1.818, 1.050, 2.056]) 
initial_sigma
initial_pi = np.c_[[0, -43.501, 0, 0, 0, 0]] 
initial_pi

# solve with BLP instruments 
results = problem.solve(
    initial_sigma,
    initial_pi,
    costs_bounds=(0.001, None),
    W_type='clustered',
    se_type='clustered',
    initial_update=True,
    iteration=pyblp.Iteration('squarem', {'atol': 1e-14, 'max_evaluations': 1000}),
    scale_objective = False,
    optimization=pyblp.Optimization('bfgs', {'gtol': 1e-5, 'maxiter': 1000}),
    method='1s',
)
results
# we don't save 1s results because not reported in col 3 of C&G table 8
#
# optimal instruments
updated_problem = results.compute_optimal_instruments(method='approximate').to_problem()
final_results = updated_problem.solve(
    initial_sigma,
    initial_pi,
    costs_bounds=(0.001, None),
    W_type='clustered',
    se_type='clustered',
    initial_update=True,
    iteration=pyblp.Iteration('squarem', {'atol': 1e-14, 'max_evaluations': 1000}),
    scale_objective = False,
    optimization=pyblp.Optimization('bfgs', {'gtol': 1e-5, 'maxiter': 1000}),
)
elasticities = final_results.compute_elasticities()
means = final_results.extract_diagonal_means(elasticities)
np.mean(means) #
np.savetxt('Output/NewIV/own_elasticities_halton10k_2s.csv',means, delimiter=",")
np.savetxt('Output/NewIV/parameters_halton10k_2s.csv',final_results.parameters, fmt="%6.3f", delimiter=",")
par_cov = np.diagonal(final_results.parameter_covariances)
np.savetxt('Output/NewIV/par_cov_halton10k_2s.csv',par_cov, fmt="%6.3f", delimiter=",")


